To install the required packages to run the code below please execute the follwing code.
deps <- c("gelnet","dplyr","gdata")
for(pkg in deps) if (!pkg %in% installed.packages()) install.packages(pkg, dependencies = TRUE)library(gelnet)
library(dplyr)
library(gdata)# load PCBC metadata (contains group info)
load("pcbc.pd.f.Rda")
pcbc.pd.f# load PCBC 450K data (subset of 219 probes of interest)
load("pcbc.data.Rda")
pcbc.data## Mean-center the data
m <- apply(pcbc.data, 1, mean )
pcbc.data.2 <- pcbc.data - m
# Define PCBC groups (SC and non.SC)
M1_smp <- pcbc.pd.f[pcbc.pd.f$Diffname_short %in% "SC",] #SC
M2_smp <- pcbc.pd.f[!(pcbc.pd.f$Diffname_short %in% "SC"),] #non-SC
# Select PCBC data
X.tr <- pcbc.data.2[, as.character(M1_smp$UID)] # 44 samples
X.bk <- pcbc.data.2[, as.character(M2_smp$UID)] # 55 samples
## Train a one-class model
mm <- gelnet( t(X.tr), NULL, 0, 1 ) #NULL for a one-class task Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.3320004
Iteration 3 : f = 0.3263175
## Store the signature to a file
save(mm, file = "pcbc-stemsig.p219.Rda")
# Cross-validation with linear model:
## Perform leave-one-out cross-validation
auc <- c()
for( i in 1:ncol(X.tr) ) {
## Train a model on non-left-out data
X1 <- X.tr[,-i]
X1 <- as.matrix(X1)
K <- t(X1) %*% X1 / nrow(X1)
m1 <- gelnet.ker( K, NULL, lambda=1 )
w1 <- X1 %*% m1$v
## Score the left-out sample against the background
X.bk <- X.bk[rownames(X.tr),]
X.bk <- as.matrix(X.bk)
s.bk <- t(w1) %*% X.bk
s.bk <- unmatrix(s.bk)
s1 <- t(w1) %*% X.tr[,i]
s1 <- unmatrix(s1)
## AUC = P( left-out sample is scored above the background )
auc[i] <- sum( s1 > s.bk ) / length(s.bk)
cat( "Current AUC: ", auc[i], "\n" )
cat( "Average AUC: ", mean(auc), "\n" )
}Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879741
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6878262
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879694
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879539
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.687956
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879825
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879637
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879255
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879657
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879444
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879578
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879635
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879582
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879487
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.687947
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879496
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879388
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879559
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879396
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879534
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6878202
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879763
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879253
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879475
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6878935
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879382
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879424
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879736
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879895
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879299
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879542
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879551
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879409
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879349
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879832
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.68797
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879832
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879171
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879451
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879434
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879494
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.6879603
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.687948
Current AUC: 1
Average AUC: 1
Training a one-class model
Iteration 1 : f = 0.6931472
Iteration 2 : f = 0.687957
Current AUC: 1
Average AUC: 1
# ## Uses the signature to score PanCan33 data
# load TCGA 450K data (subset of 219 probes of interest)
load("data.pan.Rda")
data.panload("type.info.Rda") #(contains tumor type info)
type.info# Function to replace NA values with median of probe values by tumor type
source("replaceNA.R")
testset <- replace.NA(data.pan, type.info, by="median") ## Load the signature
load("pcbc-stemsig.p219.Rda")
w <- mm$w
X <- testset[as.character(names(w)),]
X <- as.matrix(X)
## Score via linear model
ss <- t(w) %*% X
## Scale the scores to be between 0 and 1
ss <- ss - min(ss)
ss <- ss / max(ss)
ss <- as.data.frame(t(ss))
colnames(ss) <- "mDNAsi"
save(ss, file="TCGA_mDNAsi.Rda")